/* ooc.c - Stuff to handle overused N-mers (tiles) in genome
 * indexing schemes. */
/* Copyright 2001-2003 Jim Kent.  All rights reserved. */
#include "common.h"
#include "ooc.h"
#include "sig.h"

void oocMaskCounts(char *oocFile, bits32 *tileCounts, int tileSize,
                   bits32 maxPat)
/* Set items of tileCounts to maxPat if they are in oocFile.
 * Effectively masks this out of index.*/
{
  if (oocFile != NULL) {
    bits32 sig, psz;
    FILE *f = mustOpen(oocFile, "rb");
    boolean mustSwap = FALSE;

    mustReadOne(f, sig);
    mustReadOne(f, psz);
    if (sig == oocSig)
      mustSwap = FALSE;
    else if (sig == oocSigSwapped) {
      mustSwap = TRUE;
      psz = byteSwap32(psz);
    } else
      errAbort("Bad signature on %s\n", oocFile);
    if (psz != tileSize)
      errAbort("Oligo size mismatch in %s. Expecting %d got %d\n", oocFile,
               tileSize, psz);
    if (mustSwap) {
      union {
        bits32 whole;
        UBYTE bytes[4];
      } u, v;
      while (readOne(f, u)) {
        v.bytes[0] = u.bytes[3];
        v.bytes[1] = u.bytes[2];
        v.bytes[2] = u.bytes[1];
        v.bytes[3] = u.bytes[0];
        tileCounts[v.whole] = maxPat;
      }
    } else {
      bits32 oli;
      while (readOne(f, oli))
        tileCounts[oli] = maxPat;
    }
    fclose(f);
  }
}

void oocMaskSimpleRepeats(bits32 *tileCounts, int seedSize, bits32 maxPat)
/* Mask out simple repeats in index . */
{
  int i, j, k;
  int tileMask = (1 << (seedSize + seedSize)) - 1;
  for (i = 0; i < 4; ++i) {
    for (j = 0; j < 4; ++j) {
      bits32 repeat = 0;
      for (k = 0; k < 8; ++k) {
        repeat <<= 2;
        repeat |= i;
        repeat <<= 2;
        repeat |= j;
      }
      repeat &= tileMask;
      tileCounts[repeat] = maxPat;
    }
  }
}
